# analyze_mutations.py
import pandas as pd

def analyze_mutations_dataframe(df, discard_deletions=False):
    """
    This function processes a DataFrame of mutation data, applying the same logic as analyze_mutations.
    It assumes the DataFrame has columns 'positions' and 'bases' among possibly others.

    Parameters:
        df (pd.DataFrame): The input DataFrame containing mutation data.
        valid_substitution_positions (set): Valid positions for substitutions.
        valid_deletion_positions (set): Valid positions for deletions.
        homopolymer_positions (set): Positions considered as homopolymers.
        discard_deletions (bool): Whether to discard reads with deletions.

    Returns:
        pd.DataFrame: A new DataFrame with additional columns representing the analyzed results.
    """
    
    # Function to convert string representation of list to actual list
    def parse_list_string(s):
        # Remove spaces, if any, then split by comma
        return s.replace(' ', '').split(',')
    
    # Define the structure of the results. Each row in the df will be mapped to this structure.
    analyzed_rows = []

    for _, row in df.iterrows():
        # Initialize the dictionary for each row's analysis result
        mutation_details = {
            'perfect_match': False,
            'insertions': [],
            'ambiguous_deletions': [],
            'deletions': [],
            'substitutions': [],
            'discard': False  # Additional flag
        }

        # Convert 'positions' and 'bases' from strings to actual lists
        positions = [int(x) for x in parse_list_string(row['positions'])] if row['positions'] else []
        bases = parse_list_string(row['bases']) if row['bases'] else []

        # If there are no mutations, mark it as a perfect match.
        if not positions and not bases:
            mutation_details['perfect_match'] = True
        else:
            for position, base in zip(positions, bases):
                if base == 'Deletion':
                    if discard_deletions:
                        mutation_details['discard'] = True
                        break  # No need to continue analyzing this row
                    else:
                        mutation_details['deletions'].append(position)
                elif base == 'Insertion':
                    mutation_details['insertions'].append(position)
                else:  # Assuming 'base' contains the substitution base here, not just the string "Substitution"
                    mutation_details['substitutions'].append((position, base))

        # If no significant mutations, mark as perfect match.
        if not any(mutation_details['insertions'] + mutation_details['ambiguous_deletions'] + mutation_details['deletions'] + mutation_details['substitutions']):
            mutation_details['perfect_match'] = True

        # Append this row's analysis result in our list
        analyzed_rows.append(mutation_details)

    # Create a DataFrame from our analyzed results
    analyzed_df = pd.DataFrame(analyzed_rows)
    
    # Reset the indices of the DataFrames before concatenating
    df.reset_index(drop=True, inplace=True)
    analyzed_df.reset_index(drop=True, inplace=True)

    # Combine the original DataFrame with the new information (you'll need to ensure the columns match what you want)
    combined_df = pd.concat([df, analyzed_df], axis=1)

    return combined_df